In [1]:
import pandas as pd
import numpy as np
import seaborn as sb
import altair as alt
from vega_datasets import data
alt.data_transformers.disable_max_rows()
Out[1]:
DataTransformerRegistry.enable('default')
In [2]:
d = sb.load_dataset('diamonds')
d.rename(columns = {'x':'length', 'y':'width', 'z':'depth_in_mm'}, inplace = True)
df = d.sample(n = 4000, replace = False, ignore_index = True)
df.head()
Out[2]:
carat cut color clarity depth table price length width depth_in_mm
0 1.24 Ideal G VS2 62.1 56.0 8504 6.87 6.91 4.28
1 0.42 Ideal F SI1 60.9 55.0 992 4.88 4.84 2.96
2 1.26 Premium G SI2 59.1 59.0 5899 7.09 7.05 4.18
3 1.70 Ideal F VS2 62.4 57.0 17360 7.65 7.57 4.75
4 0.70 Very Good E VS1 62.0 60.0 3109 5.61 5.64 3.49
In [3]:
brush = alt.selection_interval()
click = alt.selection_point(fields = ['cut'])

# Creating legend
legend = alt.Chart(df).mark_square(size = 85).encode(
    y = alt.Y('cut', title = None, axis = alt.Axis(orient = 'right', labelFontWeight = 600)),
    color = alt.condition(click, alt.Color('cut:N', legend = None), alt.value('lightgrey'))
).properties(title = 'Cut').add_params(click)

# Scatter Plot
scatter = alt.Chart(df, width = 350, height = 360).mark_circle(size = 35).encode(
    x = alt.X('carat:Q', title = 'Carat', axis = alt.Axis(grid = False)),
    y = alt.Y('price:Q', title = 'Price', axis = alt.Axis(format = '$s', grid = False)),
    color = alt.condition(brush, alt.Color('cut:N', legend = None), alt.value('lightgrey')),
    opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Carat vs Price').add_params(brush, click).transform_filter(click)

# Clarity Bar Chart
bar1 = alt.Chart(df, width = 350).mark_bar().encode(
    x = alt.X('count()', stack = None, title = None, axis = alt.Axis(grid = False, format = 's')),
    y = alt.Y('clarity:N', title = None, scale = alt.Scale(domain = df['clarity'].unique().tolist())),
    color = alt.Color('cut', legend = None),
    opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Distribution of Clarity').transform_filter(brush).add_params(click)


# Color Bar Chart
bar2 = alt.Chart(df, width = 350).mark_bar().encode(
    x = alt.X('count()', stack = None, title = None, axis = alt.Axis(grid = False, format = 's')),
    y = alt.Y('color:N', title = None, scale = alt.Scale(domain = df['color'].unique().tolist())),
    color = alt.Color('cut', legend = None),
    opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Distribution of Color').transform_filter(brush).add_params(click)



(scatter | (bar1 & bar2) | legend).configure_view(strokeWidth = 0).configure_title(fontSize = 12)
Out[3]:
In [4]:
# Format selections
color = alt.param(value = 'steelblue', bind = alt.binding(input = 'color', name = 'Color: '))
size = alt.param(value = 35, bind = alt.binding(input = 'range', min = 10, max = 80, step = 5, name = 'Size: '))
titlesize = alt.param(value = 12, bind = alt.binding(input = 'range', min = 5, max = 18, step = 1, name = 'Title Size: '))

# Selections
click = alt.selection_point(fields = ['cut', 'clarity'], on = 'pointerover')
brush = alt.selection_interval(encodings = ['x'])

# Creating legened
legend = alt.Chart(df).mark_rect().encode(
    x = alt.X('cut', title = None, axis = alt.Axis(labelFontWeight = 600)),
    y = alt.Y('clarity', title = None, axis = alt.Axis(orient = 'right', labelFontWeight = 600)),
    color = alt.condition(click, alt.Color('count()', legend = None), alt.value('lightgrey'))
).properties(title = alt.TitleParams('Cut and Clarity', fontSize = 11)).add_params(click)

scatter1 = alt.Chart(df).mark_point(color = color, size = size).encode(
    x = alt.X('carat', title = 'Carat'),
    y = alt.Y('price', title = 'Price', axis = alt.Axis(format = '$s')),
    opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = alt.TitleParams('Price vs Carat', fontSize = titlesize)).add_params(click, color, size, titlesize)

scatter2 = alt.Chart(df).mark_point(color = color, size = size).encode(
    x = alt.X('carat', title = 'Carat'),
    y = alt.Y('length', title = 'Length'),
    opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = alt.TitleParams('Length vs Carat', fontSize = titlesize)).add_params(click, color, size, titlesize)

(scatter1 | scatter2 | legend).configure_axis(grid = False).configure_view(strokeWidth = 0)
Out[4]:
In [5]:
df = d.sample(n = 20000, replace = False, ignore_index = True)
# Binding cut and clarity
cut = df['cut'].unique().tolist()
button1 = alt.binding_radio(name = 'Cut type: ', options = [None] + cut, labels = ['All'] + cut )
clarity = df['clarity'].unique().tolist()
button2 = alt.binding_radio(name = 'clarity type: ', options = [None] + clarity, labels = ['All'] + clarity )

# Selection point with both radio buttons
click = alt.selection_point(fields = ['cut', 'clarity'], bind = {'cut':button1, 'clarity':button2})


hist1 = alt.Chart(df).mark_bar().encode(
    x = alt.X('table:Q', title = 'Table', bin = alt.Bin(maxbins = 40)),
    y = alt.Y('count()', title = 'Frequency', stack = None),
    color = alt.condition(click, alt.Color('cut:N', legend = None), alt.value('white')),
    stroke = alt.condition(click, alt.Color('clarity:N', legend = None), alt.value('white')),
    opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Diamond Table Distribution').add_params(click).transform_filter(click)

hist2 = alt.Chart(df).mark_bar().encode(
    x = alt.X('length:Q', title = 'Length', bin = alt.Bin(maxbins = 40)),
    y = alt.Y('count()', title = 'Frequency', stack = None),
    color = alt.condition(click, alt.Color('cut:N', legend = None), alt.value('white')),
    stroke = alt.condition(click, alt.Color('clarity:N', legend = None), alt.value('white')),
    opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Diamond Length Distribution').add_params(click).transform_filter(click)

scatter = alt.Chart(df, title = 'Diamond Length vs Table').mark_circle(size = 40).encode(
    x = alt.X('table', title = 'Table', scale = alt.Scale(zero = False)),
    y = alt.Y('length', title = 'Length', scale = alt.Scale(zero = False))
).transform_filter(click).add_params(click)



(scatter | hist1 | hist2).configure_axis(grid = False).configure_view(strokeWidth = 0)
Out[5]:
In [6]:
df = d.sample(n = 4000, replace = False, ignore_index = True)
titlefont = alt.param(value = 'serif', bind = alt.binding(input = 'radio', 
                                                          options = ['Georgia', 'Arial', 'Verdana', 'Times New Roman'], 
                                                          name = 'Title font '))
titlesize = alt.param(value = 14, bind = alt.binding(input = 'range', min = 5, max = 20, step = 1, name = 'Title size '))
titleweight = alt.param(value = 700, bind = alt.binding(input = 'range', min = 500, max = 900, step = 200, name = 'Title weight '))
titledx = alt.param(value = 0, bind = alt.binding(input = 'range', min = -80, max = 80, step = 1, name = 'Title dx '))
titlecolor = alt.param(value = 'black', bind = alt.binding(input = 'color', name = 'Title color '))
size = alt.param(value = 30, bind = alt.binding(input = 'range', min = 5, max = 30, step = 1, name = 'Bar size '))
color = alt.param(value = 'steelblue', bind = alt.binding(input = 'color', name = 'Bar Color '))
strokecolor = alt.param(value = 'steelblue', bind = alt.binding(input = 'color', name = 'Stroke color '))
opacity = alt.param(value = 1, bind = alt.binding(input = 'range', min = 0, max = 1, step = 0.05, name = 'Opacity '))
xsize = alt.param(value = 10, bind = alt.binding(input = 'range', min = 5, max = 20, step = 1, name = 'Axis size '))
xcolor = alt.param(value = 'black', bind = alt.binding(input = 'color', name = 'Axis color '))
textalign = alt.param(value = 'left', bind = alt.binding(input = 'select', options = ['left', 'middle', 'right'], name = 'Text align '))
textdx = alt.param(value = 3, bind = alt.binding(input = 'range', min = -20, max = 20, step = 1, name = 'Text dx '))
textdy = alt.param(value = 0, bind = alt.binding(input = 'range', min = -20, max = 20, step = 1, name = 'Text dy '))
textangle = alt.param(value = 0, bind = alt.binding(input = 'range', min = -180, max = 180, step = 1, name = 'Text angle'))

# title
title = alt.TitleParams('Total Number of Diamond Cut Variations',
                        dx = titledx,
                        font = titlefont, 
                        fontSize = titlesize, 
                        fontWeight = titleweight, 
                        color = titlecolor)

# Bar Chart
bar = alt.Chart(df, width = 400, height = 160, title = title).mark_bar(size = size, color = color, stroke = strokecolor, opacity = opacity).encode(
    x = alt.X('count()', axis = None),
    y = alt.Y('cut', title = None, axis = alt.Axis(labelFont = titlefont, 
                                                   labelFontWeight = titleweight, 
                                                   ticks = False, labelPadding = 5,
                                                   labelFontSize = xsize,
                                                   labelColor = xcolor))
).add_params(titlefont, 
             titlesize, 
             titleweight, 
             titledx, 
             titlecolor, 
             xsize, xcolor, 
             size, color, 
             strokecolor, opacity)

# Text
text = bar.mark_text(align = textalign, dx = textdx, dy = textdy, angle = textangle, fontWeight = titleweight, font = titlefont).encode(
    text = alt.Text('count()')
).add_params(textalign, textdx, textdy, textangle)

chart = bar + text

chart.configure_view(strokeWidth = 0)
Out[6]:
In [ ]: